## Warning: Missing column names filled in: 'X1' [1]

Hiv diagnoses

income_hiv %>% 
  filter(year != "2011" & age != "All") %>%
  lm(hiv_diagnoses ~ borough + gender + age + mid_income, data = .) %>% 
  summary()
## 
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + age + mid_income, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.106  -3.702  -1.040   2.239  50.426 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           9.835e-01  3.024e-01   3.252  0.00115 ** 
## boroughBrooklyn       2.975e-01  2.807e-01   1.060  0.28922    
## boroughManhattan      3.091e+00  3.313e-01   9.332  < 2e-16 ***
## boroughQueens        -1.245e+00  2.588e-01  -4.811 1.53e-06 ***
## boroughStaten Island -4.376e+00  3.972e-01 -11.016  < 2e-16 ***
## genderMale            6.083e+00  1.515e-01  40.138  < 2e-16 ***
## age20 - 29            9.600e+00  2.625e-01  36.576  < 2e-16 ***
## age30 - 39            6.870e+00  2.625e-01  26.175  < 2e-16 ***
## age40 - 49            4.627e+00  2.625e-01  17.627  < 2e-16 ***
## age50 - 59            2.355e+00  2.625e-01   8.972  < 2e-16 ***
## age60+                4.267e-01  2.625e-01   1.626  0.10406    
## mid_income           -1.238e-04  6.938e-06 -17.851  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.682 on 7764 degrees of freedom
## Multiple R-squared:  0.3594, Adjusted R-squared:  0.3585 
## F-statistic: 395.9 on 11 and 7764 DF,  p-value: < 2.2e-16
income_hiv %>% 
  filter(year != "2011" & race != "All") %>%
  lm(hiv_diagnoses ~ borough + gender + race + mid_income, data = .) %>% 
  summary()
## 
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + race + mid_income, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.319  -5.652  -1.628   2.949  84.026 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.514e+00  5.142e-01   2.945  0.00324 ** 
## boroughBrooklyn       3.570e-01  4.929e-01   0.724  0.46898    
## boroughManhattan      3.710e+00  5.818e-01   6.376 1.95e-10 ***
## boroughQueens        -1.494e+00  4.545e-01  -3.287  0.00102 ** 
## boroughStaten Island -5.251e+00  6.976e-01  -7.527 5.90e-14 ***
## genderMale            7.299e+00  2.662e-01  27.425  < 2e-16 ***
## raceBlack             1.093e+01  4.208e-01  25.978  < 2e-16 ***
## raceLatino/Hispanic   9.027e+00  4.208e-01  21.451  < 2e-16 ***
## raceOther/Unknown    -1.380e+00  4.208e-01  -3.278  0.00105 ** 
## raceWhite             3.628e+00  4.208e-01   8.621  < 2e-16 ***
## mid_income           -1.486e-04  1.218e-05 -12.197  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.71 on 6469 degrees of freedom
## Multiple R-squared:  0.2699, Adjusted R-squared:  0.2687 
## F-statistic: 239.1 on 10 and 6469 DF,  p-value: < 2.2e-16
income_plot = income_hiv %>% 
  filter(year != "2011") %>% 
  group_by(uhf, year) %>% 
  summarise(sum_hiv = mean(hiv_diagnoses), mid_in = median(mid_income)) %>% 
  ggplot(aes(x = mid_in, y = sum_hiv, color = year)) +
  geom_point() + 
  geom_smooth(method = lm) +
  theme_bw() +
  theme(legend.position = "None")
ggplotly(income_plot)

Income distribution in different neighborhood

income_dist = income_hiv %>% 
  ggplot(aes(y = mid_income, x = uhf)) +
  geom_point(alpha = 0.1) +
  coord_flip() +
  theme_bw()
ggplotly(income_dist)         

HIV diagnosis rate

income_hiv %>% 
  filter(year != "2011" & age != "All") %>%
  lm(hiv_diagnosis_rate ~ borough + gender + age + mid_income, data = .) %>% 
  summary()
## 
## Call:
## lm(formula = hiv_diagnosis_rate ~ borough + gender + age + mid_income, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -80.124 -20.972  -4.021  15.428 210.913 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.785e+01  1.511e+00  11.811  < 2e-16 ***
## boroughBrooklyn      -1.208e+01  1.403e+00  -8.611  < 2e-16 ***
## boroughManhattan      1.542e+01  1.656e+00   9.316  < 2e-16 ***
## boroughQueens        -2.262e+01  1.293e+00 -17.492  < 2e-16 ***
## boroughStaten Island -3.052e+01  1.985e+00 -15.377  < 2e-16 ***
## genderMale            3.982e+01  7.573e-01  52.582  < 2e-16 ***
## age20 - 29            4.406e+01  1.312e+00  33.589  < 2e-16 ***
## age30 - 39            3.321e+01  1.312e+00  25.322  < 2e-16 ***
## age40 - 49            2.799e+01  1.312e+00  21.336  < 2e-16 ***
## age50 - 59            1.384e+01  1.312e+00  10.552  < 2e-16 ***
## age60+               -4.261e+00  1.312e+00  -3.249  0.00116 ** 
## mid_income           -6.354e-04  3.467e-05 -18.326  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.39 on 7764 degrees of freedom
## Multiple R-squared:  0.4477, Adjusted R-squared:  0.4469 
## F-statistic: 572.2 on 11 and 7764 DF,  p-value: < 2.2e-16
income_hiv %>% 
  filter(year != "2011" & race != "All") %>%
  lm(hiv_diagnosis_rate ~ borough + gender + race + mid_income, data = .) %>% 
  summary()
## 
## Call:
## lm(formula = hiv_diagnosis_rate ~ borough + gender + race + mid_income, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -128.03  -29.36   -5.99   16.43  412.12 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           7.947e-01  2.459e+00   0.323   0.7466    
## boroughBrooklyn      -1.195e+01  2.357e+00  -5.070 4.09e-07 ***
## boroughManhattan      2.214e+01  2.783e+00   7.955 2.10e-15 ***
## boroughQueens        -2.522e+01  2.173e+00 -11.603  < 2e-16 ***
## boroughStaten Island -3.125e+01  3.336e+00  -9.367  < 2e-16 ***
## genderMale            4.948e+01  1.273e+00  38.875  < 2e-16 ***
## raceBlack             6.181e+01  2.012e+00  30.713  < 2e-16 ***
## raceLatino/Hispanic   3.440e+01  2.012e+00  17.095  < 2e-16 ***
## raceOther/Unknown     9.809e+00  2.012e+00   4.874 1.12e-06 ***
## raceWhite             1.298e+01  2.012e+00   6.452 1.19e-10 ***
## mid_income           -1.007e-04  5.827e-05  -1.729   0.0839 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 51.23 on 6469 degrees of freedom
## Multiple R-squared:  0.3569, Adjusted R-squared:  0.3559 
## F-statistic: 358.9 on 10 and 6469 DF,  p-value: < 2.2e-16
income_plot_diag_rate = income_hiv %>% 
  filter(year != "2011") %>% 
  group_by(uhf, year) %>% 
  summarise(sum_hiv_diagnosis_rate = sum(hiv_diagnosis_rate), mid_in = median(mid_income)) %>% 
  ggplot(aes(x = mid_in, y = sum_hiv_diagnosis_rate, color = year)) +
  geom_point() + 
  geom_smooth(method = lm) +
  theme_bw() +
  theme(legend.position = "None")
ggplotly(income_plot_diag_rate)